package com.dmp.total


import java.sql.{Connection, DriverManager}

import com.dmp.config.ConfigHandler
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

object Sql4Parquet {
  def main(args: Array[String]): Unit = {
    if (args.length != 3) {
      println(
        """
          |Usage:
          | cn.sheep.violet.etl.Bz2Parquet
          | args:
          |     dataInputPath:  原始日志输入路径
          |     dataOutputPath: parquet存储路径
        """.stripMargin)
      sys.exit(101) // 101: 参数不合法 $?
    }
    val Array(inpath,outpath,jsonFilePath)=args
    Logger.getLogger("org").setLevel(Level.WARN)
    val sparkConf = new SparkConf()
    sparkConf.setAppName("查询parquet文件中的内容")
    sparkConf.setMaster("local[*]")
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

    val sc = new SparkContext(sparkConf)

    val sQLContext = new SQLContext(sc)

    val dataFrame = sQLContext.read.parquet(inpath)
    dataFrame.registerTempTable("adlog")
    val sql="select count(*) ct,provincename,cityname from adlog group by provincename,cityname order by provincename"
    val result: DataFrame = sQLContext.sql(sql)
    result.show()
    /*result.coalesce(4)
        .write.json("f:/violet/report/json")*/
    result.write.jdbc(ConfigHandler.url,"orc_report_province",ConfigHandler.prop)
    sc.stop()
  }
}
